In [1]:
using Gadfly
using RandomMatrices
In [2]:
#Let's generate a lot of Gaussian random variables
t = 1000 #Number of trials
data = randn(t);
_, b = hist(data, -3:0.5:3)
plot(x=-3:0.5:3, y=b, Geom.bar)
#plot(x=data, Geom.histogram)
#The answer is an approximation to the Bell curve
Out[2]:
In [5]:
#How about matrices of Gaussians, rather than just vectors of Gaussians?
#Let's look at the elements of a matrix using a spy plot.
A=randn(5,5)
A=A+A'
eigvals(A)
spy(A)
Out[5]:
In [6]:
#What happens when I look at the statistics of eigenvalues?
n = 100 #Dimension of matrix to generate
t = 1000 #Number of trials
rawdata={}
for i=1:t
A = randn(n,n)
A = (A+A')/sqrt(2n)
push!(rawdata, A)
end
@time data = [eigvals(A) for A in rawdata]
data = [data...]; #Flattens the data
size(data)
Out[6]:
In [7]:
#This is not a Bell curve, but instead is a semicircle!
plot(x=data, Geom.histogram)
Out[7]:
In [8]:
#Let's look at another statistic we could generate, which is
#the distribution of the largest eigenvalue
n = 100 #Dimension of matrix to generate
t = 1000 #Number of trials
@time data=[eigmax(A) for A in rawdata]
size(data)
Out[8]:
In [9]:
#The resulting plot shows what is called the Tracy-Widom distribution
plot(x=data, Geom.histogram)
Out[9]:
In [10]:
#The RandomMatrices package provides a way to generate tridiagonal random matrices
methods(tridrand)
Out[10]:
In [66]:
#So let's generate a bunch of these matrices and look at them!
n = 100 #Dimension of matrix to generate
t = 1000 #Number of trials
rawdata_trid = [tridrand(GaussianHermite(2), n) for i=1:t];
rawdata_trid[1] #Look at one of these matrices
Out[66]:
In [67]:
#Collect eigenvalue statistics. Note that is MUCH faster than before!
@time data=[eigvals(A) for A in rawdata_trid]
data=[data...]
plot(x=data, Geom.histogram)
Out[67]:
In [68]:
#Collect maximal eigenvalue statistics. Note that is also MUCH faster than before!
@time data=[eigmax(A) for A in rawdata_trid]
@show data[1]
plot(x=data, Geom.histogram)
Out[68]:
In [14]:
#Let's look at why eigvals and eigmax are so much faster than before.
#Earlier we were working with dense full matrices
spy(rawdata[1])
Out[14]:
In [15]:
#But the second time round we were generating special matrices!
spy(full(rawdata_trid[1])) #This has the vertical axis flipped
Out[15]:
In [18]:
#Let's look at the individual matrix elements
data_topleft = [A[51,10] for A in rawdata] #Each dense matrix element is a Gaussian.
plot(x=data_topleft, Geom.histogram)
Out[18]:
In [69]:
#But the off-diagonal elements of the random tridiagonal are different
data_topleft = [A.ev for A in rawdata_trid]
data_topleft = [data_topleft...]
plot(x=data_topleft, Geom.histogram) #Very clearly NOT Gaussian - a clue at what's going on...
Out[69]:
In [23]:
#The answer is multiple dispatch. Given a particular function:
help(eigvals)
In [24]:
#Julia knows multiple ways to calculate eigenvalues
methods(eigvals)
Out[24]:
In [70]:
#The first time we used this method for dense matrices:
@show typeof(rawdata[1])
@which eigvals(rawdata[1])
#which calls this LAPACK routine: http://www.netlib.no/netlib/lapack/double/dgeev.f
In [71]:
#and the second time we used this method for symmetric tridiagonal matrices:
@show typeof(rawdata_trid[1])
@which eigvals(rawdata_trid[1])
#which calls this LAPACK routine: http://www.netlib.no/netlib/lapack/double/dstev.f
In [34]:
HilbertMatrix = [1//(i+j) for i=1:5, j=1:5]
Out[34]:
In [38]:
#With rationals, addition is exact
1//10 + 2//10
Out[38]:
In [39]:
#which is not always the case with floating-point
0.1 + 0.2
Out[39]:
In [73]:
#Julia supports matrix problems using Rationals, so you can get numerically exact solutions
b = [1//i for i=1:5]
HilbertMatrix \ b
Out[73]:
In [50]:
#We also have some support for matrix factorizations also
lufact(HilbertMatrix)
Out[50]:
In [51]:
#We have a special function to compute the largest eigenvalue
methods(eigmax)
Out[51]:
In [55]:
@time rawdata_trid = [tridrand(GaussianHermite(2), 300) for i=1:1000];
In [55]:
@time data = [maximum(eigvals(A)) for A in rawdata_trid];
In [55]:
@time data = [eigmax(A) for A in rawdata_trid]; #significantly faster!
In [57]:
Q, R = qr(randn(3,3))
Out[57]:
In [58]:
Q'*Q
Out[58]:
In [60]:
A=qrfact(randn(3,3))
Out[60]:
In [61]:
names(A) #A convenient way to find out the fields of a given type
Out[61]:
In [62]:
A.factors
Out[62]:
In [63]:
A.T
Out[63]:
In [ ]: